Export and plot results from MCMC output

Libraries, functions

library(tictoc)
library(ggplot2)
library(dplyr)
library(data.table)
names_functions = list.files(here::here("functions"))
for (f in names_functions)
    source(here::here("functions", f))
rm(f, names_functions)

Load “global” variables

parties <- load_parties()
states <- load_states()
regions <- load_regions()
states_regions <- load_dataland_states_regions()
n_parties_by_state <- load_n_parties_by_geography("state")
dates_campaign <- load_dates_election_campaign(year = 2024)
electoral_votes <- load_electoral_votes()
election_day <- load_election_day()

Load priors -> visualize along with posterior distribution of \pi_T

priors <- readRDS(here::here("priors", "priors.Rds"))

Generate results for different scenarios

scenarios <- load_scenarios()

List to store plots

plts <- list()

Define a function to generate csv output and plots for each scenario

gen_results <- function(plts, scen) {
    print(scen)
    out <- readRDS(here::here("model", paste0("mcmc_out_", scen, ".Rds")))

    df_polls <- read.csv(here::here("data", paste0("df_polls_", scen, ".csv")))
    df_polls$date <- as.Date(df_polls$date)
    # make non-zero polls more visible in plots
    df_polls$value = ifelse(df_polls$value == 0, 
                            NA, 
                            df_polls$value) 

    df_prior_ppi <- invert_alr_on_prior(priors[[scen]][["m_mmu_T"]])

    # use data.table -> much, much quicker than tidyverse!
    df_draws_ppi <- convert_draws_to_dt(
        rstan::extract(out, pars = "ppi")[["ppi"]],                        
        geographies = states,
        parties = parties,
        dates_campaign = dates_campaign)
    
    df_draws_ppi_reg <- convert_draws_to_dt(
        rstan::extract(out, pars = "ppi_reg")[["ppi_reg"]],
        geographies = regions,
        parties = parties,
        dates_campaign = dates_campaign)

    df_draws_ppi_nat <- convert_draws_to_dt(
        rstan::extract(out, pars = "ppi_nat")[["ppi_nat"]],                        
        geographies = "national",
        parties = parties, 
        dates_campaign = dates_campaign)

    tic("calc prob win election")
    df_prob_win_election <- do.call(
        "rbind", 
        lapply(
            dates_campaign, 
            FUN = calc_prob_win_election,
            df_draws_ppi = df_draws_ppi,
            df_draws_ppi_nat = df_draws_ppi_nat,
            states = states,
            parties = parties,
            electoral_votes = electoral_votes
            )
        )
    toc()
    tic("calc prob win states")
    df_prob_win_states <- do.call(
        "rbind", 
        lapply(
            dates_campaign, 
            FUN = calc_prob_win_states,
            df_draws_ppi = df_draws_ppi,
            states = states,
            parties = parties
            )
        ) 
    toc()

    # Export mean vote share and win probabilities to read 
    tic("calc mean vote states")
    df_draws_ppi %>% 
    group_by(t, party, geography) %>% 
    summarise(mean_vote_share = mean(values)) %>%
    rename(date = t, province = geography) %>%
    mutate(party = tolower(party)) %>%
    tidyr::pivot_wider(
        names_from = "party", 
        values_from = "mean_vote_share",
        names_glue = "{party}_{.value}") -> df_out_mean
    toc()
    
    df_prob_win_states %>%
    mutate(party = tolower(party)) %>%
    rename(province = geography) %>%
    tidyr::pivot_wider(
        names_from = "party", 
        values_from = "prob_win",
        names_glue = "{party}_{.value}") -> df_out_prob_win_states
    
    write.csv(
        merge(
            # only export until day of latest available poll
            filter(df_out_mean, date <= max(df_polls$date)), 
            filter(df_out_prob_win_states, date <= max(df_polls$date)),
            by = c("date", "province")
        ), 
        file = here::here(paste0("provincial_forecast_", scen, ".csv")),
        row.names = FALSE
    )
    tic("calc mean vote share national")
    df_draws_ppi_nat %>% 
        select(-geography) %>%
        group_by(t, party) %>% 
        summarise(mean_vote_share = mean(values)) %>%
        rename(date = t) %>%
        mutate(party = tolower(party)) %>%
        tidyr::pivot_wider(
            names_from = "party", 
            values_from = "mean_vote_share",
            names_glue = "{party}_{.value}") -> df_out_mean_nat
    toc()
    tic("transform win prob election for export")
    df_prob_win_election %>%
        mutate(party = tolower(party)) %>%
        tidyr::pivot_wider(
            names_from = "party", 
            values_from = "prob_win",
            names_glue = "{party}_{.value}") -> df_out_prob_win_election
    toc()
    
    write.csv(
        merge(
            # only export until day of latest available poll
            filter(df_out_mean_nat, date <= max(df_polls$date)), 
            filter(df_out_prob_win_election, date <= max(df_polls$date)), 
            by = "date"
        ), 
        file = here::here(paste0("national_forecast_", scen, ".csv")),
        row.names = FALSE
    )

    # Plots    
    
    plot_prob_win_election(
        df_prob_win_election, 
        election_day, 
        scen) -> plts[[scen]][["plt_prob_win_election"]]

    tic("plot win prob election over time")
    plot_prob_win_election_over_time(
        df_prob_win_election, 
        scen) -> plts[[scen]][["plt_prob_win_election_over_time"]]
    toc()
    plot_prob_win_states(
        df_prob_win_states,
        election_day,
        scen) -> plts[[scen]][["plt_prob_win_states"]]

    plot_prob_win_states_over_time(
        df_prob_win_states, 
        scen) -> plts[[scen]][["plt_prob_win_states_over_time"]]

    plot_ppiT(
        df_draws_ppi, 
        df_prior_ppi, 
        election_day, 
        n_geographies = length(states), 
        scen) -> plts[[scen]][["plt_ppiT"]]
    
    plot_ppi(
        df_draws_ppi, 
        filter(df_polls, scenario == scen), 
        n_geographies = length(states), 
        type_of_poll = "state", 
        plt_title_prefix = scen) -> plts[[scen]][["plt_ppi"]]

    plot_ppi(
        df_draws_ppi_reg, 
        filter(df_polls, scenario == scen), 
        n_geographies = length(regions), 
        type_of_poll = "regional", 
        plt_title_prefix = scen) -> plts[[scen]][["plt_ppi_reg"]]    
    

    plot_ppi(
        df_draws_ppi_nat, 
        filter(df_polls, scenario == scen), 
        n_geographies = 1, 
        type_of_poll = "national", 
        plt_title_prefix = scen) -> plts[[scen]][["plt_ppi_nat"]]
    rm(df_draws_ppi, 
        df_draws_ppi_reg, 
        df_draws_ppi_nat, 
        df_polls,
        df_prior_ppi,
        df_prob_win_election,
        df_prob_win_states,
        df_out_prob_win_election,
        df_out_prob_win_states,
        df_out_mean, 
        df_out_mean_nat,
        out)
    plts 
}

Loop over scenarios

for (scen in scenarios) {
    plts <- gen_results(plts, scen)
}
[1] "A"
calc prob win election: 546.31 sec elapsed
calc prob win states: 492.19 sec elapsed
calc mean vote states: 0.9 sec elapsed
calc mean vote share national: 0.09 sec elapsed
transform win prob election for export: 0.05 sec elapsed
plot win prob election over time: 0.02 sec elapsed
[1] "B"
calc prob win election: 816.97 sec elapsed
calc prob win states: 491.44 sec elapsed
calc mean vote states: 0.72 sec elapsed
calc mean vote share national: 0.11 sec elapsed
transform win prob election for export: 0.01 sec elapsed
plot win prob election over time: 0.04 sec elapsed
[1] "C"
calc prob win election: 673.12 sec elapsed
calc prob win states: 492.21 sec elapsed
calc mean vote states: 0.73 sec elapsed
calc mean vote share national: 0.11 sec elapsed
transform win prob election for export: 0 sec elapsed
plot win prob election over time: 0.03 sec elapsed
[1] "D"
calc prob win election: 896.33 sec elapsed
calc prob win states: 490.01 sec elapsed
calc mean vote states: 0.75 sec elapsed
calc mean vote share national: 0.09 sec elapsed
transform win prob election for export: 0.01 sec elapsed
plot win prob election over time: 0.02 sec elapsed
[1] "E"
calc prob win election: 554.75 sec elapsed
calc prob win states: 484.91 sec elapsed
calc mean vote states: 0.87 sec elapsed
calc mean vote share national: 0.08 sec elapsed
transform win prob election for export: 0.01 sec elapsed
plot win prob election over time: 0 sec elapsed

Plot results